# 调用本地部署的Qwen模型
from datetime import datetime

from openai import OpenAI

client = OpenAI(
    base_url="http://192.168.80.35:8000/v1",
    api_key="token-abc123",
)


def api(content):
    completion = client.chat.completions.create(
        model="/home/linweibin/liujian/model/Qwen2.5-72B-Instruct-GPTQ-Int8",
        messages=[
            {"role": "user", "content": content}
        ],
        temperature=0.1
    )
    return completion.choices[0].message.content



t1 = datetime.now()
print(api("福建有几个城市"))
second = (datetime.now()-t1).total_seconds()
print(f"耗时{second}")